
import os
import sys

# 设置项目根目录
BASE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../"))
if BASE_DIR not in sys.path:
    sys.path.insert(0, BASE_DIR)

from src.missing.observe import (
    read_dataset,
    missing_value_analysis_overall,
    missing_value_analysis_by_country,
    missing_value_analysis_by_year,
    save_to_csv
)

# 路径配置
input_file = os.path.join(BASE_DIR, "data", "2_missing_value_analysis", "original", "merged_indicators.xlsx")
output_dir = os.path.join(BASE_DIR, "data", "2_missing_value_analysis", "missing_stats")
os.makedirs(output_dir, exist_ok=True)

# 加载数据
df = read_dataset(input_file)

# 缺失值观测分析
overall = missing_value_analysis_overall(df)
by_country = missing_value_analysis_by_country(df)
by_year = missing_value_analysis_by_year(df)

# 保存结果为 CSV
save_to_csv(overall, os.path.join(output_dir, "missing_overall.csv"))
save_to_csv(by_country, os.path.join(output_dir, "missing_by_country.csv"))
save_to_csv(by_year, os.path.join(output_dir, "missing_by_year.csv"))
